import os
import re
from openpyxl import Workbook

# book = Workbook()
# sheet = book.active
# wb = Workbook(write_only=True)
# wb = Workbook()
# ws = wb.create_sheet()
# 54431学腋：推广美棉办法：收买实棉细则农商部农林司编安徽实业杂志1919年10月,(复刊)第28期民国八年十月清华大学图书馆(北京)
# 54432学腋：用滑车改良新式翻车图说汪珍投稿安徽实业杂志1919年10月,(复刊)第28期民国八年十月清华大学图书馆(北京)
# with open(r"D:\dl\bs\DBF\Jdqk.DAT", 'r', encoding='utf8', errors='ignore') as f:
reg_cols = re.compile(r'^(\d+)(.+)[,，期](.+)')
with open(r"D:\dl\bs\DBF\Jdqk.DAT", 'r', encoding='gbk', errors='ignore') as f:
    # lines = [l.strip().replace('\r','').replace('\n','') for l in f.read().split(') ')]
    lines = [l.strip().replace('\r','').replace('\n','') for l in f.read().split('\x20') if len(l)>4]
    print(len(lines))
    with open('dat.txt', "w", encoding='utf-8') as fw:
        for line in lines:
        # for line in lines[:10]:
            # ms = reg_cols.findall(line.strip())
            print(line)
            fw.write(line+'\n')
        # if ms:
        #     row = ms[0]
        #     # print(row)
        #     # ws.append(row)
        # else:
        #     print(line)

# wb.save("output.xlsx")

# # for line in lines[-5:]:
# #     print(line)
# for line in lines[3000:3005]:
#     print(line)
